#### TABLE A2: SAMPLE REPRESENTATIVENESS (STUDY 3)
#### Population distribution by region - Survey vs Census

rm(list = ls())
source("./2_code/00_setup.R")

#### CENSUS DATA ####

pops <- read.csv(paste0(data_path, "totals_pop.csv"))

total_population_all_regions <- sum(pops$total)

region_totals <- pops %>%
  group_by(offerwise_region_1) %>%
  summarise(
    total_population_region = sum(total),
    male_population_region = sum(hombre),
    female_population_region = sum(mujer)
  )

region_totals <- region_totals %>%
  mutate(
    proportion_male = male_population_region / total_population_all_regions,
    proportion_female = female_population_region / total_population_all_regions
  )

region_totals$offerwise_region_1 <- factor(region_totals$offerwise_region_1,
                                           levels = c(1, 2, 3, 4, 6),
                                           labels = c("Amazonia", "Andina", "Pacifico", "Caribe", "Orinoquia"))

census_df <- region_totals %>%
  select(offerwise_region_1, proportion_female, proportion_male) %>%
  rename(
    Region = offerwise_region_1,
    `Female Population Proportion` = proportion_female,
    `Male Population Proportion` = proportion_male
  )


#### SURVEY DATA (STUDY 3) ####

data3 <- fread(paste0(data_path, "data_study3.csv"), header = TRUE)

freq_table <- table(data3$region, data3$male)
prop_table <- freq_table / sum(freq_table)

survey_df <- data.frame(
  Region = c("Amazonia", "Andina", "Pacifico", "Caribe", "Orinoquia"),
  `Female Population Proportion` = prop_table[, 1],
  `Male Population Proportion` = prop_table[, 2],
  check.names = FALSE
)


#### CREATE COMBINED LATEX TABLE ####

# Format numbers
census_df <- census_df %>%
  mutate(
    `Female Population Proportion` = sprintf("%.2f", `Female Population Proportion`),
    `Male Population Proportion` = sprintf("%.2f", `Male Population Proportion`)
  )

survey_df <- survey_df %>%
  mutate(
    `Female Population Proportion` = sprintf("%.2f", `Female Population Proportion`),
    `Male Population Proportion` = sprintf("%.2f", `Male Population Proportion`)
  )

# Build LaTeX table manually
latex_lines <- c(
  "\\begin{tabular}{lcc}",
  "\\hline",
  "\\multicolumn{3}{c}{\\textbf{2018 Census: Population Distribution by Region}} \\\\",
  " & Female Population & Male Population \\\\",
  " & Proportion & Proportion \\\\",
  "\\hline"
)

# Add census rows
for (i in 1:nrow(census_df)) {
  latex_lines <- c(latex_lines, 
                   paste0(census_df$Region[i], " & ", 
                          census_df$`Female Population Proportion`[i], " & ", 
                          census_df$`Male Population Proportion`[i], " \\\\"))
}

# Add survey header
latex_lines <- c(latex_lines,
  "\\hline",
  "\\multicolumn{3}{c}{\\textbf{Survey 3 Sample Distribution by Region}} \\\\",
  " & Female Population & Male Population \\\\",
  " & Proportion & Proportion \\\\",
  "\\hline"
)

# Add survey rows
for (i in 1:nrow(survey_df)) {
  latex_lines <- c(latex_lines, 
                   paste0(survey_df$Region[i], " & ", 
                          survey_df$`Female Population Proportion`[i], " & ", 
                          survey_df$`Male Population Proportion`[i], " \\\\"))
}

latex_lines <- c(latex_lines, "\\hline", "\\end{tabular}")

# Write to file
writeLines(latex_lines, paste0(tables_path, "table_A2.tex"))
